*-------------------------------------------------------------------------------
* Create hockey stick function and graph
*-------------------------------------------------------------------------------


*Back to person level observations
use "$temp/cleaned_obs_reconfig_hockey_ver$ver.dta", clear

* drop last spell that end in non-employment (retirement etc.)
sort persnr begorig
by persnr: gen tag1 = _n
by persnr: gen tag2 = _N
compress
gen byte tag = (tag1==tag2 & toid == 1)
tab tag, mis
drop if tag==1
drop tag tag1 tag2

drop gap tentgelt
drop if currentid == 1

forv q = 1/4 {
gen byte q`q' = 0
}

replace q1 = 1 if (month(begorig) <= 3 & month(endorig) >=  4) | (month(endorig) ==  3 & day(endorig) == 31)   
replace q2 = 1 if (month(begorig) <= 6 & month(endorig) >=  7) | (month(endorig) ==  6 & day(endorig) == 30)   
replace q3 = 1 if (month(begorig) <= 9 & month(endorig) >= 10) | (month(endorig) ==  9 & day(endorig) == 30)   
replace q4 = 1 if                                                (month(endorig) == 12 & day(endorig) == 31)   

drop if q1 + q2 + q3 + q4 == 0

forv q = 1/4 {
bysort currentid year: egen st_e_q`q' = sum(q`q')
}
compress

forv q = 1/4 {
gen byte ee`q' = 0
gen byte en`q' = 0
replace ee`q' = . if q`q' == 0
replace en`q' = . if q`q' == 0
}
 
replace ee4 = 1 if ee4 == 0 & currentid!=toid & toid!=1
replace en4 = 1 if en4 == 0 & toid==1 

forv q = 3(-1)1 {
	dis `q'
	local f = `q' + 1
	dis `f'
replace ee`q' = 1 if ee`q' == 0 & currentid!=toid & toid!=1 & missing(ee`f')
replace en`q' = 1 if en`q' == 0 & toid==1                   & missing(en`f') 
}

forv q = 1/4 {
bysort currentid year: egen ee_e`q' = sum(ee`q')
bysort currentid year: egen en_e`q' = sum(en`q')
}
compress

gen temp = 0
forv q= 1/4 {
replace temp = temp + 1 if st_e_q`q' != 0
}
gen temp1 = (st_e_q1 + st_e_q2 + st_e_q3 + st_e_q4)/temp
drop temp
bysort currentid year: egen c = mean(temp1)
drop temp1
compress

drop begorig endorig toid q1 q2 q3 q4 ee1 - en4
sort currentid year persnr
drop persnr 

duplicates drop currentid year, force

if $drop_size > 0 {
bysort currentid: egen mean_size = mean(c)
drop if mean_size <= $drop_size & $drop_size > 0
drop c mean_size
}

if $min_size > 0 {
bysort currentid: egen tag_min = min(c)
drop if tag_min < $min_size
}


forv q = 1/4 {
bysort currentid: gen eer`q' = ee_e`q' / st_e_q`q'
bysort currentid: gen enr`q' = en_e`q' / st_e_q`q'
}

expand 4, gen(q)
order q, after(year)
sort currentid year q
replace q = q + q[_n-1] if currentid == currentid[_n-1] & year == year[_n-1]
replace q = q + 1
su q

gen eer = .
gen enr = . 
gen st  = .
forv q = 1/4 {
    replace eer = eer`q'   if `q' == q
	replace enr = enr`q'   if `q' == q
	replace st = st_e_q`q' if `q' == q
}
compress

sort currentid year q
gen st_lag = st[_n-1] if currentid == currentid[_n-1] & ((q-1 == q[_n-1] & year == year[_n-1]) | (q+3 == q[_n-1] & year - 1 == year[_n-1]))
replace st_lag = 0 if missing(st_lag)

gen est_growth = (st - st_lag)/ ((st + st_lag)/2)
su est_growth
compress

drop if missing(eer) & missing(est_growth) 

gen eer_new = eer[_n-1] if currentid == currentid[_n-1] & ((q-1 == q[_n-1] & year == year[_n-1]) | (q+3 == q[_n-1] & year - 1 == year[_n-1]))
gen enr_new = enr[_n-1] if currentid == currentid[_n-1] & ((q-1 == q[_n-1] & year == year[_n-1]) | (q+3 == q[_n-1] & year - 1 == year[_n-1]))

drop if est_growth ==  2 // do not have outflow for these
drop if est_growth == -2 // to make it symmetric

drop eer enr
rename eer_new eer
rename enr_new enr

keep currentid year q eer enr st st_lag est_growth

rename currentid betnr
rename eer eerate
rename enr enrate

drop if year <= $syear
drop if year >= $eyear

recast double est_growth eerate enrate st st_lag

save "$temp/move_rates_growth_raw_ver$ver.dta", replace

*Create hockey-stick function 
*Collapse by growth rate bins 
gen double bin=0
//pre-create average rates (all periods)
gen eerate_avgall=.
gen enrate_avgall=.

rename est_growth est_growth_orig
gen est_growth = round(est_growth_orig*1000)
su est_growth
	
forvalues i = -1975(50)1975 {
	qui {
		replace bin = `i' if est_growth>=(`i'-25) & est_growth<(`i'+25) & `i' != 1975 
		if `i' == 1975 {
		replace bin = 1975 if est_growth>=(`i'-25) & est_growth<=(`i'+25)	
		}
}
}		
tab bin, mis


forvalues i = -1975(50)1975 {
	qui {		
		*All periods averaged
		sum eerate if bin == `i'  
		replace eerate_avgall = r(mean) if bin == `i'
		sum enrate if bin == `i' 
		replace enrate_avgall = r(mean) if bin == `i'
	}
}	
assert !missing(eerate_avgall)

replace bin = bin / 1000
su bin

keep betnr year q st st_lag est_growth bin eerate_avgall enrate_avgall bin
save "$results/estab_eeenrates_byyr_ver$ver.dta", replace

keep bin eerate_avgall enrate_avgall

duplicates drop bin, force 
sort bin

*Transition graphs

twoway (line eerate_avgall bin, sort lcolor(blue)) ///
(line enrate_avgall bin, sort lcolor(black)), ///
ytitle(Probability) xtitle(Growth rate (quarterly)) ///
legend(order(1 "EE" 2 "EN")) ylabel(0(.1).6)
graph export "$graph/hockey_stick_ver$ver.pdf", replace

twoway (line eerate_avgall bin, sort lcolor(blue)) ///
(line enrate_avgall bin, sort lcolor(black)) ///
if bin >= -.975 & bin <= .975, ///
ytitle(Probability) xtitle(Growth rate (quarterly)) ///
legend(order(1 "EE" 2 "EN")) ylabel(0(.1).5) ///
graphr(color(gs16)) ysc(r(0 0.5)) xsc(r(-1 1))
graph export "$graph/hockey_stick_cut_ver$ver.pdf", replace
